# %pip install pydeck
# %pip install pydeck[jupyter]Pydeck 실습
Install
pydeck 설치 가이드 - https://deckgl.readthedocs.io/en/latest/installation.html
강의자료 출처 - https://zzsza.github.io/data/2019/11/24/pydeck/
현재 pydeck은 3.7-3.9 까지만 호환
!jupyter nbextension install --sys-prefix --symlink --overwrite --py pydeck
!jupyter nbextension enable --sys-prefix --py pydeckInstalling C:\Users\user\anaconda3\envs\quartoenv\lib\site-packages\pydeck\nbextension/static -> pydeck
Removing: C:\Users\user\anaconda3\envs\quartoenv\share\jupyter\nbextensions\pydeck
Symlinking: C:\Users\user\anaconda3\envs\quartoenv\share\jupyter\nbextensions\pydeck -> C:\Users\user\anaconda3\envs\quartoenv\lib\site-packages\pydeck\nbextension\static
- Validating: ok
To initialize this nbextension in the browser every time the notebook (or other app) loads:
jupyter nbextension enable pydeck --py --sys-prefix
Enabling notebook extension pydeck/extensionRequires...
- Validating: ok
# MAPBOX_API_KEY="pk.eyJ1Ijoic3BlYXI1MzA2IiwiYSI6ImNremN5Z2FrOTI0ZGgycm45Mzh3dDV6OWQifQ.kXGWHPRjnVAEHgVgLzXn2g"공식 홈페이지 예시
import pandas as pd
import pydeck
UK_ACCIDENTS_DATA = 'https://raw.githubusercontent.com/uber-common/deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv'
pd.read_csv(UK_ACCIDENTS_DATA).head()| lng | lat | |
|---|---|---|
| 0 | -0.198465 | 51.505538 |
| 1 | -0.178838 | 51.491836 |
| 2 | -0.205590 | 51.514910 |
| 3 | -0.208327 | 51.514952 |
| 4 | -0.206022 | 51.496572 |
layer = pydeck.Layer(
'HexagonLayer',
UK_ACCIDENTS_DATA,
get_position='[lng,lat]',
auto_highlight=True,
elevation_scale=50,
pickable=True,
elevation_range=[0, 3000],
extruded=True,
coverage=1,
radius=1000)
# Set the viewport location
view_state = pydeck.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=6,
min_zoom=5,
max_zoom=15,
pitch=40.5,
bearing=-27.36)
# Combined all of it and render a viewport
r = pydeck.Deck(layers=[layer], initial_view_state=view_state)
r.show()
# r.to_html('demo.html')layer.elevation_range = [0, 500]
r.update()import pydeck as pdk
pdk.Deck?Scatter Plots
import pandas as pd
from pydeck import (
data_utils,
Deck,
Layer
)
# First, let's use Pandas to download our data
URL = 'https://raw.githubusercontent.com/ajduberstein/data_sets/master/beijing_subway_station.csv'
df = pd.read_csv(URL)
df.head()| lat | lng | osm_id | station_name | chinese_name | opening_date | color | line_name | |
|---|---|---|---|---|---|---|---|---|
| 0 | 39.940249 | 116.456359 | 1351272524 | Agricultural Exhibition Center | 农业展览馆 | 2008-07-19 | [0, 146, 188, 255] | Line 10 |
| 1 | 39.955570 | 116.388507 | 5057476994 | Andelibeijie | 安德里北街 | 2015-12-26 | [0, 155, 119, 255] | Line 8 (North section) |
| 2 | 39.947729 | 116.402067 | 339088654 | Andingmen | 安定门 | 1984-09-20 | [0, 75, 135, 255] | Line 2 |
| 3 | 40.011026 | 116.263981 | 1362259113 | Anheqiao North | 安河桥北 | 2009-09-28 | [0, 140, 149, 255] | Line 4 |
| 4 | 39.967112 | 116.388398 | 5305505996 | Anhuaqiao | 安华桥 | 2012-12-30 | [0, 155, 119, 255] | Line 8 (North section) |
from ast import literal_eval
# We have to re-code position to be one field in a list, so we'll do that here:
# The CSV encodes the [R, G, B, A] color values listed in it as a string
df['color'] = df.apply(lambda x: literal_eval(x['color']), axis=1)df.head()| lat | lng | osm_id | station_name | chinese_name | opening_date | color | line_name | |
|---|---|---|---|---|---|---|---|---|
| 0 | 39.940249 | 116.456359 | 1351272524 | Agricultural Exhibition Center | 农业展览馆 | 2008-07-19 | [0, 146, 188, 255] | Line 10 |
| 1 | 39.955570 | 116.388507 | 5057476994 | Andelibeijie | 安德里北街 | 2015-12-26 | [0, 155, 119, 255] | Line 8 (North section) |
| 2 | 39.947729 | 116.402067 | 339088654 | Andingmen | 安定门 | 1984-09-20 | [0, 75, 135, 255] | Line 2 |
| 3 | 40.011026 | 116.263981 | 1362259113 | Anheqiao North | 安河桥北 | 2009-09-28 | [0, 140, 149, 255] | Line 4 |
| 4 | 39.967112 | 116.388398 | 5305505996 | Anhuaqiao | 安华桥 | 2012-12-30 | [0, 155, 119, 255] | Line 8 (North section) |
# Use pydeck's data_utils module to fit a viewport to the central 90% of the data
viewport = data_utils.compute_view(points=df[['lng', 'lat']], view_proportion=0.9)
auto_zoom_map = Deck(layers=None, initial_view_state=viewport)
auto_zoom_map.show()
# auto_zoom_map.to_html('demo.html')from IPython.core.display import display
import ipywidgets
year = 2019
scatterplot = Layer(
'ScatterplotLayer',
df,
id='scatterplot-layer',
get_radius=500,
get_fill_color='color',
get_position='[lng, lat]')
r = Deck(layers=[scatterplot], initial_view_state=viewport)
# Create an HTML header to display the year
display_el = ipywidgets.HTML('<h1>{}</h1>'.format(year))
display(display_el)
# Show the current visualization
r.show()
# r.to_html('demo.html')C:\Users\user\AppData\Local\Temp\ipykernel_17440\1358459236.py:1: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display
from IPython.core.display import display
import time
for y in range(1971, 2020):
scatterplot.data = df[df['opening_date'] <= str(y)]
year = y
# Reset the header to display the year
display_el.value = '<h1>{}</h1>'.format(year)
r.update()
time.sleep(0.1)Using pydeck to manipulate data
import pydeck as pdk
DATA_URL = 'https://api.data.gov.sg/v1/transport/taxi-availability'
COLOR_RANGE = [
[255, 255, 178, 25],
[254, 217, 118, 85],
[254, 178, 76, 127],
[253, 141, 60, 170],
[240, 59, 32, 212],
[189, 0, 38, 255]
]import pandas as pd
import requests
json = requests.get(DATA_URL).json()
df = pd.DataFrame(json["features"][0]["geometry"]["coordinates"])
df.columns = ['lng', 'lat']
viewport = pdk.data_utils.compute_view(df[['lng', 'lat']])
layer = pdk.Layer(
'ScreenGridLayer',
df,
cell_size_pixels=20,
color_range=COLOR_RANGE,
get_position='[lng, lat]',
pickable=True,
auto_highlight=True)
r = pdk.Deck(layers=[layer], initial_view_state=viewport)r.show()pd.DataFrame([r.deck_widget.selected_data])| 0 |
Plotting massive data sets.ipynb
import pandas as pd
all_lidar = pd.concat([
pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_1.csv'),
pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_2.csv'),
pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_3.csv'),
pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_4.csv'),
])
# Filter to one frame of data
lidar = all_lidar[all_lidar['source'] == 136]
lidar.loc[: , ['x', 'y']] = lidar[['x', 'y']] / 10000import pydeck as pdk
point_cloud = pdk.Layer(
'PointCloudLayer',
lidar[['x', 'y', 'z']],
get_position='[x, y, z * 10]',
get_normal=[0, 0, 1],
get_color=[255, 0, 100, 200],
pickable=True,
auto_highlight=True,
point_size=1)
view_state = pdk.data_utils.compute_view(lidar[['x', 'y']], 0.9)
view_state.max_pitch = 360
view_state.pitch = 80
view_state.bearing = 120
r = pdk.Deck(
point_cloud,
initial_view_state=view_state,
map_style='')
r.show()import time
from collections import deque
# Choose a handful of frames to loop through
frame_buffer = deque([42, 56, 81, 95])
print('Press the stop icon to exit')
while True:
current_frame = frame_buffer[0]
lidar = all_lidar[all_lidar['source'] == current_frame]
r.layers[0].get_position = '[x / 10000, y / 10000, z * 10]'
r.layers[0].data = lidar.to_dict(orient='records')
frame_buffer.rotate()
r.update()
time.sleep(0.5)Press the stop icon to exit
Interacting with other Jupyter widgets.ipynb
import pandas as pd
import pydeck as pdk
LIGHTS_URL = 'https://raw.githubusercontent.com/ajduberstein/lights_at_night/master/chengdu_lights_at_night.csv'
df = pd.read_csv(LIGHTS_URL)
df.head()| year | lng | lat | brightness | |
|---|---|---|---|---|
| 0 | 1993 | 104.575 | 31.808 | 4 |
| 1 | 1993 | 104.583 | 31.808 | 4 |
| 2 | 1993 | 104.592 | 31.808 | 4 |
| 3 | 1993 | 104.600 | 31.808 | 4 |
| 4 | 1993 | 104.675 | 31.808 | 4 |
df['color'] = df['brightness'].apply(lambda val: [255, val * 4, 255, 255])
df.sample(10)| year | lng | lat | brightness | color | |
|---|---|---|---|---|---|
| 35303 | 1997 | 103.883 | 29.550 | 4 | [255, 16, 255, 255] |
| 75226 | 2009 | 103.992 | 30.367 | 6 | [255, 24, 255, 255] |
| 24019 | 1997 | 103.775 | 30.800 | 4 | [255, 16, 255, 255] |
| 60366 | 2009 | 104.017 | 31.267 | 5 | [255, 20, 255, 255] |
| 84381 | 2009 | 104.983 | 29.575 | 8 | [255, 32, 255, 255] |
| 222765 | 2011 | 104.450 | 31.375 | 6 | [255, 24, 255, 255] |
| 64172 | 2009 | 104.650 | 31.000 | 7 | [255, 28, 255, 255] |
| 94879 | 2001 | 104.858 | 31.025 | 5 | [255, 20, 255, 255] |
| 64132 | 2009 | 104.133 | 31.000 | 8 | [255, 32, 255, 255] |
| 9067 | 1993 | 103.633 | 30.600 | 4 | [255, 16, 255, 255] |
plottable = df[df['year'] == 1993].to_dict(orient='records')
view_state = pdk.ViewState(
latitude=31.0,
longitude=104.5,
zoom=8,
max_zoom=8,
min_zoom=8)
scatterplot = pdk.Layer(
'HexagonLayer', # HeatmapLayer
data=plottable,
get_position='[lng, lat]',
get_weight='brightness',
opacity=0.5,
pickable=False,
get_radius=800)
r = pdk.Deck(
layers=[scatterplot],
initial_view_state=view_state,
views=[pdk.View(type='MapView', controller=None)])
r.show()import ipywidgets as widgets
from IPython.display import display
slider = widgets.IntSlider(1992, min=1993, max=2013, step=2)
def on_change(v):
results = df[df['year'] == slider.value].to_dict(orient='records')
scatterplot.data = results
r.update()
slider.observe(on_change, names='value')
display(slider)tooltip = {
"html": "<b>Elevation Value:</b> {elevationValue}",
"style": {
"backgroundColor": "steelblue",
"color": "white"
}
}Tooltip
import pydeck as pdk
layer = pdk.Layer(
'HexagonLayer',
UK_ACCIDENTS_DATA,
get_position='[lng, lat]',
auto_highlight=True,
elevation_scale=50,
pickable=True,
elevation_range=[0, 3000],
extruded=True,
coverage=1)
# Set the viewport location
view_state = pdk.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=6,
min_zoom=5,
max_zoom=15,
pitch=40.5,
bearing=-27.36)
# Combined all of it and render a viewport
r = pdk.Deck(
layers=[layer],
initial_view_state=view_state,
tooltip={
'html': '<b>Elevation Value:</b> {elevationValue}',
'style': {
'color': 'white'
}
}
)
r.show()- 그냥 텍스트로 하기
import pydeck as pdk
layer = pdk.Layer(
'HexagonLayer',
UK_ACCIDENTS_DATA,
get_position='[lng, lat]',
auto_highlight=True,
elevation_scale=50,
pickable=True,
elevation_range=[0, 3000],
extruded=True,
coverage=1)
# Set the viewport location
view_state = pdk.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=6,
min_zoom=5,
max_zoom=15,
pitch=40.5,
bearing=-27.36)
# Combined all of it and render a viewport
r = pdk.Deck(
layers=[layer],
initial_view_state=view_state,
tooltip = {
"text": "Elevation: {elevationValue}"
}
)
r.show()
- Tooltip을 그냥 True값만 주기
import pydeck as pdk
layer = pdk.Layer(
'HexagonLayer',
UK_ACCIDENTS_DATA,
get_position='[lng, lat]',
auto_highlight=True,
elevation_scale=50,
pickable=True,
elevation_range=[0, 3000],
extruded=True,
coverage=1)
# Set the viewport location
view_state = pdk.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=6,
min_zoom=5,
max_zoom=15,
pitch=40.5,
bearing=-27.36)
# Combined all of it and render a viewport
r = pdk.Deck(
layers=[layer],
initial_view_state=view_state,
tooltip=True
)
r.show()UK_ACCIDENTS_DATA = 'https://raw.githubusercontent.com/uber-common/deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv'
uk_data = pd.read_csv(UK_ACCIDENTS_DATA)uk_data.head()| lng | lat | |
|---|---|---|
| 0 | -0.198465 | 51.505538 |
| 1 | -0.178838 | 51.491836 |
| 2 | -0.205590 | 51.514910 |
| 3 | -0.208327 | 51.514952 |
| 4 | -0.206022 | 51.496572 |
미국 택시 데이터 시각화
import pandas as pd
import pydata_google_auth
SCOPES = [
'https://www.googleapis.com/auth/cloud-platform',
'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/bigquery'
]
credentials = pydata_google_auth.get_user_credentials(
SCOPES, auth_local_webserver=True)Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=262006177488-3425ks60hkk80fssi9vpohv88g6q1iqd.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fbigquery&state=pH3LUMkjRirs4N7GWMTcomXq5ircWi&access_type=offline
query = """
SELECT
*
FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015`
WHERE EXTRACT(MONTH from pickup_datetime) = 1
LIMIT 10000
"""# %%time
taxi_df = pd.read_gbq(query=query, dialect='standard', project_id='persuasive-zoo-147513', credentials=credentials)GenericGBQException: Reason: 403 POST https://bigquery.googleapis.com/bigquery/v2/projects/persuasive-zoo-147513/jobs?prettyPrint=false: Access Denied: Project persuasive-zoo-147513: User does not have bigquery.jobs.create permission in project persuasive-zoo-147513.
Location: None
Job ID: 9c7a150b-0c58-4f25-ae01-5d317dabc780
taxi_df| vendor_id | pickup_datetime | dropoff_datetime | passenger_count | trip_distance | pickup_longitude | pickup_latitude | rate_code | store_and_fwd_flag | dropoff_longitude | dropoff_latitude | payment_type | fare_amount | extra | mta_tax | tip_amount | tolls_amount | imp_surcharge | total_amount | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2015-01-02 16:26:22 | 2015-01-02 16:51:10 | 2 | 2.50 | -73.993172 | 40.762901 | <NA> | N | -73.962097 | 40.763584 | 1 | 15.7 | 1.0 | 0.5 | 3.50 | 0.0 | 0.0 | 21.00 |
| 1 | 1 | 2015-01-16 17:13:00 | 2015-01-16 17:16:10 | 1 | 0.40 | -73.961601 | 40.771229 | <NA> | N | -73.959419 | 40.775253 | 1 | 4.0 | 1.0 | 0.5 | 1.15 | 0.0 | 0.3 | 6.95 |
| 2 | 2 | 2015-01-24 04:25:01 | 2015-01-24 04:41:43 | 2 | 4.64 | -74.000595 | 40.737167 | <NA> | N | -73.995499 | 40.680763 | 1 | 16.0 | 0.5 | 0.5 | 19.50 | 0.0 | 0.3 | 36.80 |
| 3 | 2 | 2015-01-30 14:29:58 | 2015-01-30 15:27:13 | 1 | 18.39 | -73.989914 | 40.729706 | <NA> | N | -73.782310 | 40.644180 | 1 | 52.0 | 0.0 | 0.5 | 5.50 | 0.0 | 0.3 | 58.30 |
| 4 | 1 | 2015-01-14 21:24:13 | 2015-01-14 21:25:55 | 1 | 0.50 | -73.954849 | 40.773220 | <NA> | N | -73.959801 | 40.769432 | 1 | 3.5 | 0.5 | 0.5 | 0.96 | 0.0 | 0.3 | 5.76 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9995 | 1 | 2015-01-24 22:49:17 | 2015-01-24 22:56:20 | 2 | 0.70 | -74.004578 | 40.724056 | <NA> | N | -74.006958 | 40.732971 | 1 | 6.0 | 0.5 | 0.5 | 1.80 | 0.0 | 0.3 | 9.10 |
| 9996 | 2 | 2015-01-21 18:20:27 | 2015-01-21 18:30:16 | 6 | 1.14 | -74.000038 | 40.748291 | <NA> | N | -73.990608 | 40.738071 | 1 | 8.0 | 1.0 | 0.5 | 1.80 | 0.0 | 0.3 | 11.60 |
| 9997 | 2 | 2015-01-09 20:35:23 | 2015-01-09 20:43:55 | 1 | 2.00 | -73.974876 | 40.748661 | <NA> | N | -73.980530 | 40.768021 | 1 | 8.5 | 0.5 | 0.5 | 1.80 | 0.0 | 0.3 | 11.60 |
| 9998 | 2 | 2015-01-31 22:01:33 | 2015-01-31 22:11:22 | 1 | 1.42 | -73.984718 | 40.728447 | <NA> | N | -73.975380 | 40.745564 | 1 | 8.5 | 0.5 | 0.5 | 1.80 | 0.0 | 0.3 | 11.60 |
| 9999 | 2 | 2015-01-13 13:40:14 | 2015-01-13 13:51:28 | 5 | 1.57 | -73.981232 | 40.747498 | <NA> | N | -73.998116 | 40.733883 | 1 | 9.0 | 0.0 | 0.5 | 1.80 | 0.0 | 0.3 | 11.60 |
10000 rows × 19 columns
GridLayer
- 10만개 데이터
arc_layer = pdk.Layer(
'GridLayer',
taxi_df,
get_position='[pickup_longitude, pickup_latitude]',
pickable=True,
auto_highlight=True,
tooltip=True
)
nyc_center = [-73.9808, 40.7648]
view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9)
r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state)
r.show()Arc Layer
zip_code_query = """
WITH base_data AS
(
SELECT
nyc_taxi.*,
pickup.zip_code as pickup_zip_code,
pickup.internal_point_lat as pickup_zip_code_lat,
pickup.internal_point_lon as pickup_zip_code_lon,
dropoff.zip_code as dropoff_zip_code,
dropoff.internal_point_lat as dropoff_zip_code_lat,
dropoff.internal_point_lon as dropoff_zip_code_lon
FROM (
SELECT *
FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015`
WHERE
EXTRACT(MONTH from pickup_datetime) = 1
and pickup_latitude <= 90 and pickup_latitude >= -90
and dropoff_latitude <= 90 and dropoff_latitude >= -90
) AS nyc_taxi
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS pickup
ON ST_CONTAINS(pickup.zip_code_geom, st_geogpoint(pickup_longitude, pickup_latitude))
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS dropoff
ON ST_CONTAINS(dropoff.zip_code_geom, st_geogpoint(dropoff_longitude, dropoff_latitude))
)
SELECT
*
FROM base_data
limit 10000
"""%%time
taxi_df_by_zipcode = pd.read_gbq(query=zip_code_query, dialect='standard', project_id='persuasive-zoo-147513', credentials=credentials)Wall time: 36.5 s
taxi_df_by_zipcode.head(3)| vendor_id | pickup_datetime | dropoff_datetime | passenger_count | trip_distance | pickup_longitude | pickup_latitude | rate_code | store_and_fwd_flag | dropoff_longitude | ... | tip_amount | tolls_amount | imp_surcharge | total_amount | pickup_zip_code | pickup_zip_code_lat | pickup_zip_code_lon | dropoff_zip_code | dropoff_zip_code_lat | dropoff_zip_code_lon | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 2015-01-20 09:57:47 | 2015-01-20 10:14:24 | 1 | 2.54 | -74.014793 | 40.714111 | <NA> | N | -73.991997 | ... | 2.5 | 0.0 | 0.3 | 15.8 | 10280 | 40.709073 | -74.016423 | 10003 | 40.731829 | -73.989181 |
| 1 | 1 | 2015-01-15 04:50:46 | 2015-01-15 05:01:29 | 1 | 2.30 | -74.015938 | 40.710976 | <NA> | N | -73.996552 | ... | 0.0 | 0.0 | 0.3 | 11.3 | 10280 | 40.709073 | -74.016423 | 10011 | 40.742043 | -74.000620 |
| 2 | 2 | 2015-01-22 09:31:00 | 2015-01-22 09:50:42 | 1 | 4.13 | -73.989738 | 40.701981 | <NA> | N | -74.007828 | ... | 2.0 | 0.0 | 0.3 | 19.8 | 11201 | 40.693700 | -73.989859 | 10011 | 40.742043 | -74.000620 |
3 rows × 25 columns
arc_layer = pdk.Layer(
'ArcLayer',
taxi_df_by_zipcode,
get_source_position='[pickup_zip_code_lon, pickup_zip_code_lat]',
get_target_position='[dropoff_zip_code_lon, dropoff_zip_code_lat]',
get_source_color='[255, 255, 120]',
get_target_color='[255, 0, 0]',
get_widht='elevationValue',
pickable=True,
auto_highlight=True,
)
nyc_center = [-73.9808, 40.7648]
view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9)
r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state)
r.show()Aggregate
agg_query = """
WITH base_data AS
(
SELECT
nyc_taxi.*,
pickup.zip_code as pickup_zip_code,
pickup.internal_point_lat as pickup_zip_code_lat,
pickup.internal_point_lon as pickup_zip_code_lon,
dropoff.zip_code as dropoff_zip_code,
dropoff.internal_point_lat as dropoff_zip_code_lat,
dropoff.internal_point_lon as dropoff_zip_code_lon
FROM (
SELECT *
FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015`
WHERE
EXTRACT(MONTH from pickup_datetime) = 1
and pickup_latitude <= 90 and pickup_latitude >= -90
and dropoff_latitude <= 90 and dropoff_latitude >= -90
) AS nyc_taxi
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS pickup
ON ST_CONTAINS(pickup.zip_code_geom, st_geogpoint(pickup_longitude, pickup_latitude))
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS dropoff
ON ST_CONTAINS(dropoff.zip_code_geom, st_geogpoint(dropoff_longitude, dropoff_latitude))
)
SELECT
pickup_zip_code,
pickup_zip_code_lat,
pickup_zip_code_lon,
dropoff_zip_code,
dropoff_zip_code_lat,
dropoff_zip_code_lon,
COUNT(*) AS cnt
FROM base_data
GROUP BY 1,2,3,4,5,6
limit 10000
"""%%time
agg_df = pd.read_gbq(query=agg_query, dialect='standard', project_id='persuasive-zoo-147513', credentials=credentials)Wall time: 15.9 s
agg_df.head()| pickup_zip_code | pickup_zip_code_lat | pickup_zip_code_lon | dropoff_zip_code | dropoff_zip_code_lat | dropoff_zip_code_lon | cnt | |
|---|---|---|---|---|---|---|---|
| 0 | 11693 | 40.590916 | -73.809715 | 11414 | 40.657604 | -73.844804 | 1 |
| 1 | 10040 | 40.858314 | -73.930494 | 10040 | 40.858314 | -73.930494 | 139 |
| 2 | 10473 | 40.818690 | -73.858474 | 10030 | 40.818267 | -73.942856 | 1 |
| 3 | 10451 | 40.820454 | -73.925066 | 10031 | 40.825288 | -73.950045 | 93 |
| 4 | 11209 | 40.621993 | -74.030134 | 11228 | 40.616698 | -74.013066 | 28 |
agg_df = agg_df.sort_values('cnt', ascending=False)agg_df = agg_df[:100]
arc_layer = pdk.Layer(
'ArcLayer',
agg_df,
get_source_position='[pickup_zip_code_lon, pickup_zip_code_lat]',
get_target_position='[dropoff_zip_code_lon, dropoff_zip_code_lat]',
get_source_color='[255, 255, 120]',
get_target_color='[255, 0, 0]',
width_units='meters',
get_width="1+10*cnt/500",
pickable=True,
auto_highlight=True,
)
nyc_center = [-73.9808, 40.7648]
view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9)
r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state,
tooltip={
'html': '<b>count:</b> {cnt}',
'style': {
'color': 'white'
}
}
)
r.show()요일별 위젯
agg_query2 = """
WITH base_data AS
(
SELECT
nyc_taxi.*,
pickup.zip_code as pickup_zip_code,
pickup.internal_point_lat as pickup_zip_code_lat,
pickup.internal_point_lon as pickup_zip_code_lon,
dropoff.zip_code as dropoff_zip_code,
dropoff.internal_point_lat as dropoff_zip_code_lat,
dropoff.internal_point_lon as dropoff_zip_code_lon
FROM (
SELECT *
FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015`
WHERE
EXTRACT(MONTH from pickup_datetime) = 1
and pickup_latitude <= 90 and pickup_latitude >= -90
and dropoff_latitude <= 90 and dropoff_latitude >= -90
LIMIT 100000
) AS nyc_taxi
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS pickup
ON ST_CONTAINS(pickup.zip_code_geom, st_geogpoint(pickup_longitude, pickup_latitude))
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS dropoff
ON ST_CONTAINS(dropoff.zip_code_geom, st_geogpoint(dropoff_longitude, dropoff_latitude))
)
SELECT
CAST(format_datetime('%u', pickup_datetime) AS INT64) -1 AS weekday,
pickup_zip_code,
pickup_zip_code_lat,
pickup_zip_code_lon,
dropoff_zip_code,
dropoff_zip_code_lat,
dropoff_zip_code_lon,
COUNT(*) AS cnt
FROM base_data
GROUP BY 1,2,3,4,5,6,7
"""%%time
agg_df2 = pd.read_gbq(query=agg_query2, dialect='standard', project_id='persuasive-zoo-147513', credentials=credentials)Wall time: 7.41 s
agg_df2.head()| weekday | pickup_zip_code | pickup_zip_code_lat | pickup_zip_code_lon | dropoff_zip_code | dropoff_zip_code_lat | dropoff_zip_code_lon | cnt | |
|---|---|---|---|---|---|---|---|---|
| 0 | 4 | 11214 | 40.599148 | -73.996090 | 10035 | 40.795458 | -73.929570 | 1 |
| 1 | 6 | 10171 | 40.755899 | -73.973858 | 11430 | 40.646809 | -73.786169 | 2 |
| 2 | 5 | 10461 | 40.847394 | -73.840583 | 10475 | 40.874375 | -73.823656 | 1 |
| 3 | 0 | 10172 | 40.755273 | -73.974315 | 10065 | 40.764628 | -73.963144 | 1 |
| 4 | 6 | 10162 | 40.769308 | -73.949924 | 11430 | 40.646809 | -73.786169 | 1 |
default_data = agg_df2[agg_df2['weekday'] == 0].to_dict(orient='records')arc_layer = pdk.Layer(
'ArcLayer',
default_data,
get_source_position='[pickup_zip_code_lon, pickup_zip_code_lat]',
get_target_position='[dropoff_zip_code_lon, dropoff_zip_code_lat]',
get_source_color='[255, 255, 120]',
get_target_color='[255, 0, 0]',
width_units='meters',
get_width="1+10*cnt/500",
pickable=True,
auto_highlight=True,
)
nyc_center = [-73.9808, 40.7648]
view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9)
r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state,
tooltip={
'html': '<b>count:</b> {cnt}',
'style': {
'color': 'white'
}
}
)
r.show()# Widget 슬라이더 생성
import ipywidgets as widgets
from IPython.display import display
slider = widgets.IntSlider(0, min=0, max=6, step=1)
# Widget에서 사용할 함수 정의
def on_change(v):
results = agg_df2[agg_df2['weekday'] == slider.value].to_dict(orient='records')
arc_layer.data = results
r.update()
# Deck과 슬라이더 연결
slider.observe(on_change, names='value')
display(slider)